US Airline Flight Route Map

In this post, I try to summarize the the construction of US Airline Flight Map. The data source comes from Bureau of Transportation Statistics. The first thing is to obtain the local airport information. For example, the GIS location, the carrier’s market share and market construction, and the distance from the near HUB. Secondly, I want to draw the flight route map.

Airport Information

To get the local airport information, we can extract from the data and use gis information to calculate distance. The whole process can be divided by three steps.

  1. extract all the flight routes flying into the particular airport
  2. use airport address to get GIS information
  3. organize the information and output the local airport information.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
import geocoder
from geopy import distance
import time

DICT_HUB={
"PA":'PHL',
"MI":'DTW',
"WV":"DCA",
"IL":"ORD",
# and we can also add other hubs in different States

}

path="your path directory"
STATE=input("input the state you want ")
Year= input("input the year and quarter you want")
# load the data
market_pd=pd.read_csv(path+STATE+Year+".csv") # like this

airport_pd=pd.read_csv(path+"airport_statistics.csv")
col=list(market_pd.columns)
market_pd.drop(col[-1],axis=1,inplace=True) #drop a columns


## first let me focus on only one routes from other state to PA
market_pd_1=market_pd.loc[market_pd['DEST_STATE_ABR']==STATE,]
STATE_airports=list(market_pd_1['DEST'].unique())
year=list(market_pd_1['YEAR'].unique())[0]

## gropuby airports from origin to dest
group_airport=market_pd_1.groupby(['ORIGIN','DEST'])
routes_list=group_airport.groups.keys()
col_info=['code','year','name','address','lat','lgt','dis_to_hub0','carriers','major_airline','ratio_1']
State_airport_info=pd.DataFrame(index=STATE_airports,columns=col_info)

## HUB airport in State
add_HUB=airport_pd.loc[airport_pd['Code']==DICT_HUB[STATE],'add'].values[0]
geo_info_HUB= geocoder.google(add_HUB)

## return GIS information and calculate the distance
for nn_airport in STATE_airports:

new_gb = pd.concat( [ group_airport.get_group(name) for name,group in group_airport if name[1]==nn_airport ] )
new_gb.drop_duplicates(['ORIGIN'],keep='first', inplace=True)

## get carrier information from different routes
new_gb['last_carrier']=new_gb['TK_CARRIER_GROUP'].str.strip("-|:")
new_gb['last_carrier']=new_gb['last_carrier'].str[-2:]
new_gb = new_gb[new_gb['last_carrier'] != '']

## get market ratio
carr_pd=new_gb.groupby("last_carrier")
carr_pd=pd.DataFrame(carr_pd['ITIN_ID'].count())
carr_pd['ratio']=carr_pd['ITIN_ID']/sum(carr_pd['ITIN_ID'])
carr_pd.sort_values(by='ratio',inplace=True)

carriers=list(zip(carr_pd.index, carr_pd.ratio))

## rest is the geographic infomation and population
## use goocode to calculate the distance
add_temp=airport_pd.loc[airport_pd['Code']==nn_airport,'add'].values[0]
geo_info_temp= geocoder.google(add_temp)
time.sleep(0.1)


while not geo_info_temp.ok:

time.sleep(0.5)
geo_info_temp= geocoder.google(nn_airport)



airport_name=airport_pd.loc[airport_pd['Code']==nn_airport,'name'].values[0]
dis_hub=distance.distance(geo_info_temp.latlng, geo_info_HUB.latlng).miles

s=pd.Series([nn_airport,year,airport_name,add_temp,geo_info_temp.lat,geo_info_temp.lng,dis_hub,carriers,carriers[-1][0],carriers[-1][1]],index=col_info)

State_airport_info.loc[nn_airport,]=s

State_airport_info.reset_index(level=0,inplace=True)

## output to airport information
State_airport_info.to_csv(path+STATE+"_airport_info.csv",index=False)

Draw the airport location map

Once we get airport infromation in different State, we can construct GIS information table and draw the map. (Currently, I just use all airport_info.csv to construct airport_gis.csv manually) The airport_gis.csv includes the airport information, i.e. gis information, major carriers and market ratio for the major carriers.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import folium
import pandas as pd
import copy
path ="your data path "
data_airport_gis=pd.read_csv(path+"airport_gis.csv")

# set the focus location of the map
map_1 = folium.Map(location=[40.8499873, -77.8486889], tiles="Mapbox Bright", zoom_start=4) # focus location can be anywhere

# add gis info into the folium
## no major
temp_pd=data_airport_gis[data_airport_gis['Major']==0].copy()

for i in range(0,len(temp_pd)):
folium.Marker([temp_pd.iloc[i]['lat'], temp_pd.iloc[i]['lgt']], popup=temp_pd.iloc[i]['code'],
icon = folium.Icon(color='green',prefix="fa", icon="plane"),).add_to(map_1)

## no major
temp_pd=data_airport_gis[data_airport_gis['Major']==1].copy()

for i in range(0,len(temp_pd)):
print(temp_pd.iloc[i]['code'])
folium.Marker([temp_pd.iloc[i]['lat'], temp_pd.iloc[i]['lgt']], popup=temp_pd.iloc[i]['code'],
icon = folium.Icon(color='blue',prefix="glyphicon", icon="plane"),).add_to(map_1)

# draw the circles
folium.Circle(
location=[temp_pd.iloc[i]['lat'], temp_pd.iloc[i]['lgt']],
radius=1609*200,
popup="200 miles",
color='crimson',
fill=True,
fill_color='crimson'
).add_to(map_1)

folium.Circle(
location=[temp_pd.iloc[i]['lat'], temp_pd.iloc[i]['lgt']],
radius=1609*300,
popup="300 miles",
color='#32aaff',
fill=True,
fill_color='#32aaff'
).add_to(map_1)

folium.Circle(
location=[temp_pd.iloc[i]['lat'], temp_pd.iloc[i]['lgt']],
radius=1609*450,
popup="450 miles",
color='#7f8eff',
fill=True,
fill_color='#7f8eff'
).add_to(map_1)


map_1.save(path+'map.html')

Construct flight route data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import sys

import pandas as pd
import geocoder
from geopy import distance
import time

DICT_HUB={
"PA":'PHL',
"MI":'DTW',
"WV":"DCA",
"IL":"ORD",


}

path ="your data path"
#STATE=input("input the state you want ")
#Year=input("time period ")
STATE="PA"
Year="2000Q1"

market_pd=pd.read_csv(path+STATE+Year+".csv")

airport_pd=pd.read_csv(path+"airport_statistics.csv")
col=list(market_pd.columns)
market_pd.drop(col[-1],axis=1,inplace=True) #drop a columns


## first let me focus on only one routes from other state to PA
market_pd_1=market_pd.loc[market_pd['DEST_STATE_ABR']==STATE,]
STATE_airports=list(market_pd_1['DEST'].unique())
year=list(market_pd_1['YEAR'].unique())[0]

#,'ORIGIN','ORIGIN_STATE_ABR','DEST','DEST_STATE_ABR'
col_map=['ITIN_ID', 'MKT_ID','AIRPORT_GROUP','TK_CARRIER_GROUP','MARKET_FARE','MARKET_DISTANCE', 'DISTANCE_GROUP','MKT_GEO_TYPE']

market_pd_map=market_pd_1[col_map]

# deal with the aiprot info

def air_cat1(x):
xx=str(x).split(":")
if len(xx)==2:

return xx[0]
else:
return xx[-2]


def air_cat2(x):
xx=str(x).split(":")
if len(xx)==2:

return xx[1]
else:
return xx[-1]


market_pd_map['pos1']=market_pd_map['AIRPORT_GROUP'].apply(air_cat1)
market_pd_map['pos2']=market_pd_map['AIRPORT_GROUP'].apply(air_cat2)

airport_list=list(pd.unique(market_pd_map['pos1']))+list(pd.unique(market_pd_map['pos2']))
col_info = ['Code','name','address','lat','lng']
airport_gis_pd=pd.DataFrame(columns=col_info)

# GIS process: use address to convert gis
for nn_airport in airport_list:
add_temp=airport_pd.loc[airport_pd['Code']==nn_airport,'add'].values[0]
airport_name=airport_pd.loc[airport_pd['Code']==nn_airport,'name'].values[0]

geo_info_temp= geocoder.google(add_temp)
time.sleep(1)
while not geo_info_temp.ok:

time.sleep(1)
geo_info_temp= geocoder.google(airport_name+", "+STATE)


s=pd.Series([nn_airport,airport_name,add_temp,geo_info_temp.lat,geo_info_temp.lng],index=col_info)

airport_gis_pd=airport_gis_pd.append(s,ignore_index=True)

airport_gis_pd.to_csv(path+"airport_PA00Q1_gis.csv",index=False)

# save the results
group_airport=market_pd_map.groupby(['pos1','pos2'])


airport_m_pd=group_airport['ITIN_ID'].count()
airport_m_pd=pd.DataFrame(airport_m_pd)
airport_m_pd = airport_m_pd.rename(columns={'ITIN_ID': 'num_flights',})

airport_m_pd.reset_index(level=0,inplace=True)
airport_m_pd.reset_index(level=0,inplace=True)



# marge the result from origin cities to destination cities
merg_fligh_od=pd.merge(airport_m_pd,airport_gis_pd[['Code','lat','lng']],left_on='pos1', right_on='Code',how="left")
merg_fligh_od = merg_fligh_od.rename(columns={'lat': 'lat_1', 'lng': 'lng_1'})
merg_fligh_od=pd.merge(merg_fligh_od,airport_gis_pd[['Code','lat','lng']],left_on='pos2', right_on='Code',how="left")
merg_fligh_od = merg_fligh_od.rename(columns={'lat': 'lat_2', 'lng': 'lng_2'})


merg_fligh_od.to_csv(path+"flight_routes.csv",index=False)

Create Flight Map with plotly

Finally, with all the gis information, I can construct the flight route map. After searching for different data visualization tools, I finally decide to use plotly to do the graph. The following is the simple example for a basic route map.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import plotly.plotly as py
import plotly
import pandas as pd

path ="your data path"

#load the data
# example
df_airports = pd.read_csv(path+'airport_PA00Q1_gis.csv')

df_airports.head()

df_flight_paths = pd.read_csv(path+"flight_routes.csv")
df_flight_paths.head()

## initialization
airports = [ dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = df_airports['lng'],
lat = df_airports['lat'],
hoverinfo = 'text',
text = df_airports['name'],
mode = 'markers',
marker = dict(
size=2,
color='rgb(255, 0, 0)',
line = dict(
width=3,
color='rgba(68, 68, 68, 0)'
)
))]

## add gis info
flight_paths = []
for i in range( len( df_flight_paths ) ):
flight_paths.append(
dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = [ df_flight_paths['lng_1'][i], df_flight_paths['lng_2'][i] ],
lat = [ df_flight_paths['lat_1'][i], df_flight_paths['lat_2'][i] ],
mode = 'lines',
line = dict(
width = 1,
color = 'red',
),
opacity = float(df_flight_paths['num_flights'][i])/float(df_flight_paths['num_flights'].max()),
)
)
## write down the map description
layout = dict(
title = '2000 Q1 flight paths<br>(PA)',
showlegend = False,
geo = dict(
scope='north america',
projection=dict( type='azimuthal equal area' ),
showland = True,
landcolor = 'rgb(243, 243, 243)',
countrycolor = 'rgb(204, 204, 204)',
),
)

# use plotly to draw the map
fig = dict( data=flight_paths + airports, layout=layout )
plotly.offline.plot( fig, filename=path+'d3-flight-paths.html' )

Resources